#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author   : Uyynot
# @Email    : uyynot@qq.com
# @Time     : 2025/7/17 17:15
# @File     : e.py
# @Project  : demoProject
# @Desc     :
sample_docs=[
  {
    "id": "doc_6",
    "title": "京东 狗东 jd JD 360buy",
    "content": "京东 狗东 jd JD 360buy",
    "metadata": {"category": "general", "difficulty": "beginner"}
  },
  {
    "id": "doc_7",
    "title": "西装品类",
    "content": "西装品类",
    "metadata": {"category": "fashion", "difficulty": "beginner"}
  },
  {
    "id": "doc_8",
    "title": "跑步服品类",
    "content": "跑步服品类",
    "metadata": {"category": "sports", "difficulty": "beginner"}
  },
  {
    "id": "doc_9",
    "title": "男装品类",
    "content": "男装品类",
    "metadata": {"category": "fashion", "difficulty": "beginner"}
  },
  {
    "id": "doc_10",
    "title": "西游记",
    "content": "西游记",
    "metadata": {"category": "literature", "difficulty": "beginner"}
  },
  {
    "id": "doc_11",
    "title": "MAIA 玛伊娅玛娅 MAIA ACTIVE",
    "content": "MAIA 玛伊娅玛娅 MAIA ACTIVE",
    "metadata": {"category": "brand", "difficulty": "beginner"}
  },
  {
    "id": "doc_12",
    "title": "斐乐品牌",
    "content": "斐乐品牌",
    "metadata": {"category": "brand", "difficulty": "beginner"}
  },
  {
    "id": "doc_13",
    "title": "天猫平台",
    "content": "天猫平台",
    "metadata": {"category": "e-commerce", "difficulty": "beginner"}
  },
  {
    "id": "doc_14",
    "title": "小野和子品牌",
    "content": "小野和子品牌",
    "metadata": {"category": "brand", "difficulty": "beginner"}
  },
  {
    "id": "doc_15",
    "title": "阿里 ali tm 天猫 淘宝 tb",
    "content": "阿里 ali tm 天猫 淘宝 tb",
    "metadata": {"category": "e-commerce", "difficulty": "beginner"}
  },
  {
    "id": "doc_16",
    "title": "厦门中山路巴黎春天旗舰店",
    "content": "厦门中山路巴黎春天旗舰店",
    "metadata": {"category": "retail", "difficulty": "beginner"}
  },
  {
    "id": "doc_17",
    "title": "巴黎的春天",
    "content": "巴黎的春天",
    "metadata": {"category": "literature", "difficulty": "beginner"}
  },
  {
    "id": "doc_18",
    "title": "萨洛蒙品牌名称： SALOMON/萨洛蒙",
    "content": "萨洛蒙品牌名称： SALOMON/萨洛蒙",
    "metadata": {"category": "brand", "difficulty": "beginner"}
  },
  {
    "id": "doc_19",
    "title": "露露品牌名称： lululemon",
    "content": "露露品牌名称： lululemon",
    "metadata": {"category": "brand", "difficulty": "beginner"}
  },
  {
    "id": "doc_20",
    "title": "硬糖鞋 糖豆鞋 品类",
    "content": "硬糖鞋 糖豆鞋 品类",
    "metadata": {"category": "footwear", "difficulty": "beginner"}
  },
  {
    "id": "doc_21",
    "title": "母婴鞋",
    "content": "母婴鞋",
    "metadata": {"category": "footwear", "difficulty": "beginner"}
  },
  {
    "id": "doc_22",
    "title": "鬼冢虎品牌",
    "content": "鬼冢虎品牌",
    "metadata": {"category": "brand", "difficulty": "beginner"}
  }
]
import pandas as pd
df = pd.DataFrame(sample_docs, columns=["title", "content"])
df.columns = ["entity", "comment"]
df.to_csv('sample.csv', index=False)
